
# library(pacman)
# p_load(dplyr, tidyr, readr, wbstats, DBI, RSQLite, countrycode, patentsview, purrr, modelsummary, fixest, kableExtra) 

library(dplyr)
library(tidyr)
library(readr)
library(wbstats)
library(DBI)
library(RSQLite)
library(countrycode)
library(patentsview)
library(purrr)
library(modelsummary)
library(fixest)
library(kableExtra)

options(modelsummary_factory_default = 'kableExtra')

##### PREPPING DATA #####

GDP <- wb_data("NY.GDP.MKTP.CD", start_date = 1990, end_date = 2023) %>% rename(year = date) %>%
  mutate(country = ifelse(country == "Turkiye", "Turkey", country)) %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c")) %>%
  rename("GDP" = "NY.GDP.MKTP.CD") %>%
  select(country_code, year, GDP)

EXP <- wb_data("NE.EXP.GNFS.CD", start_date = 1990, end_date = 2023) %>% rename(year = date)  %>%
  mutate(country = ifelse(country == "Turkiye", "Turkey", country)) %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c")) %>%
  rename("EXP" = "NE.EXP.GNFS.CD") %>%
  select(country_code, year, EXP)

POP <- wb_data("SP.POP.TOTL", start_date = 1990, end_date = 2023) %>% rename(year = date)  %>%
  mutate(country = ifelse(country == "Turkiye", "Turkey", country)) %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c")) %>%
  rename("POP" = "SP.POP.TOTL") %>%
  select(country_code, year, POP)

IND <- wb_data("NV.IND.TOTL.ZS", start_date = 1990, end_date = 2023) %>% rename(year = date)  %>%
  mutate(country = ifelse(country == "Turkiye", "Turkey", country)) %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c"))  %>%
  rename("IND" = "NV.IND.TOTL.ZS") %>%
  select(country_code, year, IND)

ICT <- wb_data("BX.GSR.CCIS.ZS", start_date = 1990, end_date = 2023) %>% rename(year = date)  %>%
  mutate(country = ifelse(country == "Turkiye", "Turkey", country)) %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c"))  %>%
  rename("ICT" = "BX.GSR.CCIS.ZS") %>%
  select(country_code, year, ICT)

con <- dbConnect(RSQLite::SQLite(), "iso_standards.sqlite")

participation <- dbReadTable(con, "participants")

memberships <- participation %>%
  mutate(country = countrycode::countrycode(sourcevar = country,
                                            origin = "country.name",
                                            destination = "country.name"))

all_sectors <- memberships %>%
  drop_na(sector) %>%
  pull(sector) %>%
  unique()

dbDisconnect(con)

#### Fetching patents data ####

# This is very time consuming to run

# dates <- seq(as_date("2004-01-01"), as_date("2023-01-01"), "months")
# 
# fields <- c(
#   c("patent_number", "patent_title", "patent_date", "patent_year",
#     "inventor_country", "patent_num_combined_citations", "cpc_subsection_id"),
#   get_fields(endpoint = "patents", groups = c("cpcs", "assignees"))
# )
# 
# for (i in 1:length(dates)) {
#   
#   date1 <- as.character(dates[i])
#   date2 <- as.character(dates[i+1])
#   
#   query <- with_qfuns(
#     and(
#       gte(patent_date = date1),
#       lt(patent_date = date2))
#   )
#   
#   df <- search_pv(query, endpoint = "patents", fields = fields, all_pages = TRUE)
#   
#   saveRDS(df, file = paste0("./patent_query/query_", dates[i], "_", dates[i+1], ".rds"))
#   
#   message(paste0("Finished query on ", dates[i]))
#   
#   Sys.sleep(5)
#   
# }
# 
# patentset <- lapply(list.files("./patent_query/", full.names = TRUE), read_rds)
# 
# patentsdata <- list()
# 
# for (i in 1:length(patentset)) {
#   
#   patentsdata[[i]] <- patentset[i][[1]]$data$patents
#   
# }
# 
# patents_all <- bind_rows(patentsdata)

#### Coding patents to standard ICT codes ####

# patents_all <- patents_all %>%
#   unnest(inventors) %>%
#   select(-inventor_key_id) %>%
#   unique()
# 
# patents_all <- patents_all %>%
#   mutate(country = countrycode::countrycode(sourcevar = inventor_country,
#                                             origin = "iso2c",
#                                             destination = "iso3n"))
# 
# # DOCUMENT USED AS CONCORDANCE:
# # https://www.oecd-ilibrary.org/docserver/ab16c396-en.pdf?expires=1710500639&id=id&accname=guest&checksum=CBEC0C3E079F9E0B52BF6B3D5C7EDAFB
# 
# patents_tech <- patents_all %>%
#   select(patent_number, patent_year, cpcs) %>%
#   unnest(cols = c(cpcs)) %>%
#   mutate(tech_area = ifelse(
#     str_detect(cpc_subgroup_id, 
#                "H03K|H03L|H03M|H04B1/69-1/719|H04J|H04L|H04L9|H04L12/14|
#     H04M3-13|H04M3-19|H04M3-99|H04Q|
#     H04B1/00-1/68|H04B1/72-1/76|H04B3-17|H04H|H04B1/59|H04B5|H04B7"), 
#     # excluding: 
#     # H04L9, H04L12/14 
#     # H04B1/59, H04B5, H04B7
#     # H04W4/24, H04W12
#     "high_speed_network", 
#     
#     ifelse(str_detect(cpc_subgroup_id,
#                       "H04B7|H04W|H04W4/24|H04W12"),
#            # excluding H04W4/24, H04W12
#            "mobile_communication",
#            
#            ifelse(str_detect(cpc_subgroup_id,
#                              "G06F12/14|G06F21|G06K19|G09C|G11C8/20|H04K|H04L9|H04M1/66-665|H04M1/667-675|H04M1/68-70|H04M1/727|H04N7/167-7/171|H04W12|
#                                         G06Q20|G07F7/08-12|G07G1/12-1/14|H04L12/14|H04W4/24|G06Q30/02"),
#                   "security",
#                   
#                   ifelse(str_detect(cpc_subgroup_id,
#                                     "G08B1/08|G08B3/10|G08B5/22-38|G08B7/06|G08B13/18-13/196|G08B13/22-26|G08B25|G08B26|G08B27|G08C|G08G1/01-065|G06F17/40|H04W84/18|
#                                                H04B1/59|H04B5|G01S13/74-84|G01V3|G01V15|H04W84/10"),
#                          "sensor_and_device_network",
#                          
#                          ifelse(str_detect(cpc_subgroup_id,
#                                            "G06F5|G06F7|G06F9|G06F11|G06F13|G06F15/00|G06F15/16-15/177|G06F15/18|G06F 15/76-15/82"),
#                                 "high_speed_computing",
#                                 
#                                 ifelse(str_detect(cpc_subgroup_id,
#                                                   "G06F3/06–3/08|G06F12|G06K1-7|G06K13|G11B|G11C|H04N5/78-5/907|G06F12/14|G11C8/20"),
#                                        # (exclude G06F12/14) (exclude G11C8/20) 
#                                        "large_capacity_high_speed_storage",
#                                        
#                                        ifelse(str_detect(cpc_subgroup_id,
#                                                          "G06F17/30|G06F17/40|G06F17/00|G06F17/10-17/18|G06F17/50|G06F19|G06Q10|G06Q30|G06Q40|G06Q50|G06Q90|G06Q99|G08G"),
#                                               # exclude: G08G1/01-065, G08G1/0962-0969
#                                               "large_capacity_information_analysis",
#                                               
#                                               ifelse(str_detect(cpc_subgroup_id,
#                                                                 "G06F17/20-17/28|G06K9|G06T7|G10L13/027|G10L15|G10L17|G10L25/63|G10L25/66|G06F15/18"),
#                                                      "cognition_and_meaning_understanding",
#                                                      
#                                                      ifelse(str_detect(cpc_subgroup_id,
#                                                                        "H04M1|G06F3/01-3/0489|G06F3/14-3/153|G06F3/16|G06K11|G06T11/80|G08G1/0962-0969|G09B5|G09B7|G09B9|H04M1/66-665|H04M1/667-675|H04M1/68-70|H04M1/727|G06F17/50|G06K9|G06T11|G06T13|G06T15|G06T17-19"),
#                                                             # exclude H04M1/66-665, H04M1/667-675, H04M1/68-70, H04M1/727
#                                                             "human_interface",
#                                                             
#                                                             ifelse(str_detect(cpc_subgroup_id,
#                                                                               "H04N|G06T1-9|G06T11|G06T13|G06T15|G06T17-19|G09G|H04N5/78-5/907|H04N7/167-7/171|G06T7|G06T11/80|
#                                                                                          H04R|H04S|G10L|G10L13/027|G10L15|G10L17|G10L25/63|G10L25/66"),
#                                                                    # excluding H04N5/78-5/907, H04N7/167-7/171, G06T7, G06T11/80, G10L13/027, G10L15, G10L17, G10L25/63,66
#                                                                    "imaging_and_sound_technology",
#                                                                    
#                                                                    ifelse(str_detect(cpc_subgroup_id,
#                                                                                      "H03B|H03C|H03D|H03F|H03G|H03H|H03J|H01B11|H01L29-33|H01L21|H01L25|H01L27|H01L43-51|G02B6|G02F|H01S5|B81B7/02|B82Y10|H01P|H01Q"),
#                                                                           "information_communication_device",
#                                                                           
#                                                                           ifelse(str_detect(cpc_subgroup_id,
#                                                                                             "G01S|G01V3|G01V8|G01V15"),
#                                                                                  "electronic_measurement",
#                                                                                  
#                                                                                  ifelse(str_detect(cpc_subgroup_id,
#                                                                                                    "G06F3/00|G06F3/05|G06F3/09|G06F3/12|G06F3/13|G06F3/18|
#                                                                                                               G06E|G06F1|G06F15/02|G06F15/04|G06F15/08-15/14|G06G7|G06J|G06K15|G06K17|G06N|H04M15|H04M17"),
#                                                                                         "others",
#                                                                                         NA)))))))))))))) %>%
#   mutate(tech_area = ifelse(str_detect(cpc_subgroup_id, "H04L9|H04L12/14|H04B1/59|H04B5|H04B7|H04W4/24|H04W12|
#                                        H04W4/24|H04W12|
#                                        G06F12/14|G11C8/20|
#                                        G08G1/01-065|G08G1/0962-0969|
#                                        H04M1/66-665|H04M1/667-675|H04M1/68-70|H04M1/727|
#                                        H04N5/78-5/907|H04N7/167-7/171|G06T7|G06T11/80|G10L13/027|G10L15|G10L17|G10L25/63|G10L25/66"),
#                             NA, tech_area)) %>%
#   filter(!is.na(tech_area)) %>%
#   select(patent_number, patent_year, tech_area) %>%
#   distinct(.keep_all = TRUE)
# 
# patents_tech <- patents_tech %>%
#   inner_join(patents_all, by = join_by(patent_number, patent_year))
# 
# saveRDS(patents_tech, file = "Appendix_E_Patentstech.rds")

##### MERGING WITH STANDARDS DATA #####

patents_tech <- read_rds("Appendix_E_Data_Patentstech.rds")

country <- patents_tech %>%
  select(patent_number, patent_year, assignees) %>%
  unnest(assignees) %>%
  select(patent_number, patent_year, assignee_country) %>%
  unique() %>% 
  group_by(patent_year, patent_number) %>%
  add_count() %>%
  mutate(share = 1/n)

patents <- patents_tech %>%
  select(patent_number, patent_year, patent_title, patent_date, tech_area) %>%
  distinct(patent_number, patent_year, patent_title, patent_date, tech_area, .keep_all = TRUE) %>%
  left_join(country, by = join_by(patent_number, patent_year)) %>%
  distinct(patent_number, patent_year, patent_title, patent_date, assignee_country, tech_area, share, .keep_all = TRUE) %>%
  drop_na(assignee_country) %>%
  mutate(country = countrycode::countrycode(sourcevar = assignee_country,
                                            origin = "iso2c",
                                            destination = "country.name")) %>%
  group_by(country) %>%
  tidyr::complete(patent_year = seq(2004, 2022), fill = list(n = 0)) %>%
  ungroup() %>%
  rename(year = patent_year) %>%
  select(-assignee_country) 

member_patents <- patents %>%
  left_join(memberships %>% 
              filter(
                committee %in% c("JTC 1")) %>%
              mutate(membership = ifelse(membership == "O-member", "O",
                                         ifelse(membership %in% c("P-member", "Secretariat"), "P",
                                                ifelse(is.na(membership), "N", membership)))),
            select(country, year, membership, committee, title),
            by = join_by(year, country), relationship = "many-to-many")


member_patents_count <- member_patents %>%
  group_by(year, country) %>%
  summarise(patents_count = sum(share)) %>%
  ungroup()

member_patents_enter <- member_patents %>%
  mutate(committee_member = ifelse(!is.na(committee), 1, 0)) %>%
  select(year, country, committee_member, membership) %>%
  unique() %>%
  left_join(member_patents_count, by = join_by(year, country)) %>%
  mutate(membership = ifelse(membership == "O-member", "O",
                             ifelse(membership %in% c("P-member", "Secretariat"), "P",
                                    membership))) %>%
  mutate(membership = ifelse(is.na(membership), "N", membership)) %>%
  unique() %>%
  group_by(country) %>%
  arrange(-desc(year)) %>%
  mutate(country_code = countrycode::countrycode(sourcevar = country,
                                                 origin = "country.name",
                                                 destination = "iso3c")) %>%
  left_join(GDP, by = join_by(year, country_code)) %>%
  left_join(POP, by = join_by(year, country_code)) %>%
  left_join(EXP, by = join_by(year, country_code)) %>%
  left_join(IND, by = join_by(year, country_code)) %>%
  left_join(ICT, by = join_by(year, country_code)) %>%
  mutate(GDPPOP = GDP/POP) %>%
  mutate(EXPGDP = EXP/GDP)


##### ANALYSIS #####

mod1 <- fepois(patents_count ~ committee_member + GDPPOP + log(EXPGDP) + IND + ICT
               | country + year,
               cluster = "country+year",
               data = member_patents_enter)

mod2 <- fepois(patents_count ~ membership + GDPPOP + log(EXPGDP) + IND + ICT
               | country + year,
               cluster = "country+year",
               data = member_patents_enter %>% filter(committee_member == 1))

cm <- c("committee_member" = "Membership in TC", 
        "membershipP" = "P-member in TC",
        "GDPPOP" = "GDP per capita",
        "log(EXPPOP)" = "Exports per GDP (ln)",
        "IND" = "Industry value added (% of GDP)",
        "ICT" = "ICT % of service exports")

models <- list(mod1, mod2) 
names(models) <- c("TC membership", "Type of membership")

rows <- tibble::tribble(~start, ~mod1, ~mod2, 
                        "Time series", "2004-2022", "2004-2022") 

tabsummary <- modelsummary(models,
                           fmt = 3,
                           coef_map = cm,
                           #output = "latex",
                           stars = TRUE,
                           statistic = c("{std.error}"),
                           gof_omit = 'AIC|BIC|Within|Std.Errors|R2|FE',
                           gof_map = c("nobs", "r.squared", "rmse"),
                           notes = c("Fixed effects: Country and year.",
                                     "Clustered standard errors by country and year.",
                                     "Control variables (WDI data): GDP per capita, Exports, Industry value added, ICT service exports",
                                     "Model: Poisson."),
                           add_rows = rows,
                           coef_omit = c("Intercept"))

tabsummary %>%
  add_header_above(c(" " = 1, "Dependent variable: Number of ICT patents" = 2)) %>%
  kable_styling(font_size = 10, full_width = FALSE) 

predict(mod1, newdata = tibble(country = c(rep("United States", 2)), 
                               year = rep("2015", 2),
                               committee_member = c(0,1), 
                               GDPPOP = rep(mean(member_patents_enter$GDPPOP, na.rm = TRUE), 2),
                               EXPGDP = rep(mean(member_patents_enter$EXPGDP, na.rm = TRUE), 2),
                               IND = rep(mean(member_patents_enter$IND, na.rm = TRUE), 2),
                               ICT = rep(mean(member_patents_enter$ICT, na.rm = TRUE), 2))) 

